data <- read.csv("owid-covid-data.csv")
data <- data %>% mutate(cases = total_cases,
deaths = total_deaths,
vac1 = people_vaccinated,
vac2 = people_fully_vaccinated,
pop = population)
data <- data %>% select(continent,
location,
cases,
deaths,
vac1,
vac2,
date,
pop)
data <- data %>% filter(!(location %in% c("World",
"Asia",
"Europe",
"North America",
"European Union",
"South America",
"Africa",
"Oceania",
"International",
"Northern Cyprus"))) #NA's
glimpse(data)
## Rows: 112,772
## Columns: 8
## $ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, ~
## $ location <fct> Afghanistan, Afghanistan, Afghanistan, Afghanistan, Afghanis~
## $ cases <dbl> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 11, 11, 11, ~
## $ deaths <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ vac1 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ vac2 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ date <fct> 2020-02-24, 2020-02-25, 2020-02-26, 2020-02-27, 2020-02-28, ~
## $ pop <dbl> 39835428, 39835428, 39835428, 39835428, 39835428, 39835428, ~
## continent location cases deaths
## : 0 Argentina : 630 Min. : 1 Min. : 1
## Africa :30055 Mexico : 630 1st Qu.: 1796 1st Qu.: 63
## Asia :27371 Peru : 630 Median : 16850 Median : 482
## Europe :27795 Thailand : 627 Mean : 457126 Mean : 11681
## North America:15185 Taiwan : 615 3rd Qu.: 165550 3rd Qu.: 4085
## Oceania : 5398 South Korea: 610 Max. :42410607 Max. :678407
## South America: 6968 (Other) :109030 NA's :5685 NA's :16223
## vac1 vac2 date
## Min. :0.000e+00 Min. :1.000e+00 2021-06-21: 219
## 1st Qu.:1.559e+05 1st Qu.:7.920e+04 2021-06-22: 219
## Median :9.441e+05 Median :6.225e+05 2021-06-23: 219
## Mean :8.855e+06 Mean :5.664e+06 2021-06-24: 219
## 3rd Qu.:4.753e+06 3rd Qu.:3.372e+06 2021-06-25: 219
## Max. :1.101e+09 Max. :1.022e+09 2021-06-26: 219
## NA's :89772 NA's :92634 (Other) :111458
## pop
## Min. :4.700e+01
## 1st Qu.:1.933e+06
## Median :8.715e+06
## Mean :4.099e+07
## 3rd Qu.:2.967e+07
## Max. :1.444e+09
##
data <- data %>%
group_by(location) %>%
fill(cases,
deaths,
vac1,
vac2, .direction = c("down"))
data <- data %>%
group_by(location) %>%
mutate(cases = 100*replace_na(cases,0)/pop,
deaths = 100*replace_na(deaths,0)/pop,
vac1 = 100*replace_na(vac1,0)/pop,
vac2 = 100*replace_na(vac2,0)/pop)
data <- data %>%
mutate(date_aux = as_date(date) )%>%
filter(year(date_aux)==2021)
data <- data %>% select(-pop,-date_aux)
## continent location cases
## : 0 Afghanistan : 264 Min. : 0.0000
## Africa :14348 Albania : 264 1st Qu.: 0.1039
## Asia :12634 Algeria : 264 Median : 1.0669
## Europe :13304 Andorra : 264 Mean : 2.8146
## North America: 8263 Angola : 264 3rd Qu.: 4.7026
## Oceania : 3883 Antigua and Barbuda: 264 Max. :21.3093
## South America: 3235 (Other) :54083
## deaths vac1 vac2 date
## Min. :0.000000 Min. : 0.000 Min. : 0.0000 2021-06-21: 219
## 1st Qu.:0.001084 1st Qu.: 0.111 1st Qu.: 0.0000 2021-06-22: 219
## Median :0.012629 Median : 4.544 Median : 0.8798 2021-06-23: 219
## Mean :0.050606 Mean : 17.437 Mean : 11.0644 2021-06-24: 219
## 3rd Qu.:0.081345 3rd Qu.: 29.046 3rd Qu.: 13.5420 2021-06-25: 219
## Max. :0.596731 Max. :118.346 Max. :117.1322 2021-06-26: 219
## (Other) :54353
## Rows: 55,667
## Columns: 7
## Groups: location [223]
## $ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, ~
## $ location <fct> Afghanistan, Afghanistan, Afghanistan, Afghanistan, Afghanis~
## $ cases <dbl> 0.1318249, 0.1320081, 0.1323169, 0.1328190, 0.1330750, 0.133~
## $ deaths <dbl> 0.005525232, 0.005550336, 0.005575439, 0.005598032, 0.005615~
## $ vac1 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ vac2 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ date <fct> 2021-01-01, 2021-01-02, 2021-01-03, 2021-01-04, 2021-01-05, ~
data %>% filter_all(any_vars(is.na(.)))
names <- c('Brazil',
'United States',
'Canada',
'Mexico',
'Germany',
'United Kingdom',
'French',
'Italy',
'Spain',
'Russia',
'India',
'South Korea',
'China',
'Japan',
'Australia')
colors <- c('#F28B30', # Asia (laranja)
'#BF0A3A', # Europa (vermelho)
'#022873', # Am?rica do norte (azul)
'#F23D6D', # Oceania (rosa)
'gray', # Outros (cinza)
'#03A62C') # Am?rica do sul (verde)describe(data)
p <- data %>%
ggplot(aes(x=cases,
y=deaths,
size=vac2)) +
geom_point(aes(color=continent),alpha=0.6) +
scale_size(range = c(.1, 24), name="fully vaccinated") +
scale_colour_manual(values = colors) +
xlim(-1, 20) +
ylim(-0.07, .61) +
theme_classic() +
theme(legend.position = c(0.83, 0.86)) +
guides(size = 'none') +
labs(title="COVID-19 vaccinations of top 15 GPD countries")
ggplotly(p)
## Warning: Removed 31 rows containing missing values (geom_point).
